library(xml2)
library(httr)
## Warning: package 'httr' was built under R version 4.2.3
library(rvest)
## Warning: package 'rvest' was built under R version 4.2.3
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
r <- read_html("https://datatrail-jhu.github.io/stable_website/webscrape.html")
s <- html_nodes(r,"strong")
t <- html_text(s)
print(t)
## [1] "rvest" "httr" "dbplyr" "jsonlite" "googlesheets"
r2 <- read_html("https://www.bbc.com")
s2 <- html_nodes(r2,".media__link") # can be tracked using extension named:s
t2 <- html_text(s2)
print(trimws(t2))
## [1] "Biden defends sending cluster bombs to Ukraine"
## [2] "Why is US giving Ukraine 'abhorrent' weapons?"
## [3] "Zelensky visits Snake Island as war enters 500th day"
## [4] "Ashes news: Build-up to crucial day of third England-Australia Test"
## [5] "Dutch government collapses over asylum row"
## [6] "Can France prevent tensions igniting again?"
## [7] "AI robot asked 'will you rebel against humans'?"
## [8] "No charges for security who blocked Britney Spears"
## [9] "Murray unsure he has motivation for Wimbledon return"
## [10] "England are in a 'winnable' position - Moeen"
## [11] "Van der Sar in intensive care after bleed on brain"
## [12] "The islands gifted as an unpaid debt"
## [13] "Sixteen of the best films of 2023"
## [14] "The maps revealing urban heat stress"
## [15] "Pressure builds on S Korea to send Ukraine stockpiled ammo"
## [16] "Coco Lee death sparks China mental health discussion"
## [17] "What Asian fans did for Taylor Swift concert tickets"
## [18] "Australian welfare hunt caused suicides - inquiry"
## [19] "Why Wimbledon’s dress code is so strict"
## [20] "What would green shipping look like?"
## [21] "Rava upma: warm and savoury semolina"
## [22] "The rise of job-searching burnout"
## [23] "Why America could be overtaken as the corn superpower"
## [24] "Bruce Springsteen settles an old score in Hyde Park"
## [25] "'Almost every influencer will be hopping on Threads'"
## [26] "The latest technology news direct to your inbox"
## [27] "Theatres tempt new audiences with virtual reality"
## [28] "BBC visits Belarus camp offered to Wagner"
## [29] "BBC visits Belarus camp offered to Wagner"
## [30] "Hail batters Spain creating icy urban scenes"
## [31] "At the scene the day after fatal Wimbledon..."
## [32] "Belarus leader pressed on nuclear weapons"
## [33] "Where is Yevgeny Prigozhin? And why does it..."
## [34] "Are wildfires in the US getting worse?"
## [35] "Australians smash Tina Turner dancing world..."
## [36] "Watch: Europe’s last Ariane-5 rocket blasts..."
## [37] "One-minute World News"
## [38] "Best in show: Africa's top shots"
## [39] "Young adults see rise in severe distress - study"
## [40] "BBC star 'accused of paying teen for explicit photos'"
## [41] "Shipping agrees net-zero goal but critics unmoved"
## [42] "Malmö is Swedish city chosen to host Eurovision"
## [43] "Canada stops advertising with Facebook in news row"
## [44] "World records hottest day for third time in a week"
## [45] "Why this music producer smashed his platinum discs"
## [46] "Why Europe's 'lonely' tech entrepreneurs need help"
## [47] "In pictures: King Charles III celebrations"
## [48] "Mystery of Holocaust escape girls finally solved"
## [49] "Eid al-Adha around the world in pictures"
## [50] "Photographer shares shots of famous musicians"
## [51] "Millinery masterpieces at Royal Ascot"
##################################################
gitresp <- GET("https://api.github.com/users/abidalishaikh/repos")
gitcontent <- content(gitresp)
lapply(gitcontent,function(x){
df <- data_frame(repo = x$name,
address = x$html_url)}) %>%
bind_rows()
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## # A tibble: 22 × 2
## repo address
## <chr> <chr>
## 1 07_RegressionModels https://github.com/AbidAliShaikh/07_RegressionModels
## 2 AbidAliShaikh https://github.com/AbidAliShaikh/AbidAliShaikh
## 3 courses https://github.com/AbidAliShaikh/courses
## 4 Distill https://github.com/AbidAliShaikh/Distill
## 5 edoc https://github.com/AbidAliShaikh/edoc
## 6 education https://github.com/AbidAliShaikh/education
## 7 ExData_Plotting1 https://github.com/AbidAliShaikh/ExData_Plotting1
## 8 fastdjango https://github.com/AbidAliShaikh/fastdjango
## 9 ggplot https://github.com/AbidAliShaikh/ggplot
## 10 gitSurveys https://github.com/AbidAliShaikh/gitSurveys
## # … with 12 more rows
##########################################
surv<- GET("https://raw.githubusercontent.com/fivethirtyeight/data/master/steak-survey/steak-risk-survey.csv")
df_surv <- content(surv,type="text/csv")
## No encoding supplied: defaulting to UTF-8.
## Rows: 551 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): Consider the following hypothetical situations: <br>In Lottery A, ...
## dbl (1): RespondentID
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_surv
## # A tibble: 551 × 15
## RespondentID Consid…¹ Do yo…² Do yo…³ Do yo…⁴ Have …⁵ Do yo…⁶ Have …⁷ Do yo…⁸
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 NA Response Respon… Respon… Respon… Respon… Respon… Respon… Respon…
## 2 3237565956 Lottery… <NA> <NA> <NA> <NA> <NA> <NA> <NA>
## 3 3234982343 Lottery… No Yes No No No No Yes
## 4 3234973379 Lottery… No Yes Yes No Yes Yes Yes
## 5 3234972383 Lottery… Yes Yes Yes No Yes Yes Yes
## 6 3234958833 Lottery… No Yes No No Yes Yes Yes
## 7 3234955240 Lottery… No No No No Yes No Yes
## 8 3234955097 Lottery… No Yes No No Yes Yes No
## 9 3234955010 Lottery… No Yes Yes Yes Yes No Yes
## 10 3234953052 Lottery… Yes Yes Yes No Yes No Yes
## # … with 541 more rows, 6 more variables:
## # `How do you like your steak prepared?` <chr>, Gender <chr>, Age <chr>,
## # `Household Income` <chr>, Education <chr>,
## # `Location (Census Region)` <chr>, and abbreviated variable names
## # ¹`Consider the following hypothetical situations: <br>In Lottery A, you have a 50% chance of success, with a payout of $100. <br>In Lottery B, you have a 90% chance of success, with a payout of $20. <br><br>Assuming you have $10 to bet, would you play Lottery A or Lottery B?`,
## # ²`Do you ever smoke cigarettes?`, ³`Do you ever drink alcohol?`,
## # ⁴`Do you ever gamble?`, ⁵`Have you ever been skydiving?`, …
###### OR WE CAN ALSO USE READ_CSV
#downloading many files for offline scrapping
download.file("https://www.ibm.com/", destfile = "ibm.html")
root_nod1 <- rvest::read_html("ibm.html")
# #root_html <- rvest::read_html(root_nod1,"html")
# body_nod <- rvest::read_html(root_nod1,"body")
# p_nod <- rvest::read_html(body_nod,"p")
# p_content <- read_text(p_nod)
########################
table_nod <- rvest::html_nodes(root_nod1, "table")
c_dataframe <- html_table(table_nod)
c_dataframe
## list()